In [1]:
import pandas as pd
import pandas.rpy.common as com
import numpy as np
from sklearn.feature_extraction import DictVectorizer
%load_ext autoreload
%autoreload 2
%load_ext rmagic
%matplotlib inline
%precision 2
pd.set_option('display.precision', 3)
import ndl
In [2]:
%%R
library(ndl)
In [3]:
data = com.load_data('numbers')
data['Cues'] = [x.split('_') for x in data['Cues']]
data['Number'] = data['Outcomes']
data
Out[3]:
In [4]:
def activation(W):
return pd.DataFrame([ndl.activation(c,W) for c in data.Cues],index=data.index)
In [5]:
data['Outcomes'] = 'plural'
data['Outcomes'][1] = 'singular'
data
Out[5]:
In [6]:
W = ndl.rw(data,M=10)
A = activation(W)
A
Out[6]:
With these associations, how many of the 15 items will the learner correctly label?
In [7]:
pd.DataFrame([data['Outcomes'], A.idxmax(1), A.idxmax(1) == data['Outcomes']], index = ['Truth', 'Prediction', 'Accurate?']).T
Out[7]:
In [8]:
np.mean(A.idxmax(1) == data['Outcomes'])
Out[8]:
How often are they correct (using relative item frequencies)?
In [9]:
float(sum(data['Frequency'] * (A.idxmax(1) == data['Outcomes']))) / float(sum(data['Frequency']))
Out[9]:
In [10]:
def accuracy(data, M):
W = ndl.rw(data, M=M)
A = activation(W)
return np.mean(A.idxmax(1) == data['Outcomes'])
In [11]:
accuracy(data, 10)
Out[11]:
For a population of 100 learners trying to acquire the number system, what proportion are able to successfully label all 15 items given M trials?
In [12]:
np.mean([accuracy(data, M=10) == 1 for i in xrange(100)])
Out[12]:
In [13]:
def population_accuracy(M=10, pop=100):
return np.mean([accuracy(data, M=M) == 1 for i in xrange(pop)])
In [14]:
MAX_TRIALS = 500
P = {}
In [15]:
P['sg / pl'] = [population_accuracy(M=i) for i in xrange(1,MAX_TRIALS)]
In [16]:
import matplotlib.pyplot as plt
plt.plot(range(1,len(P['sg / pl'])+1), P['sg / pl'], '-', linewidth=2)
plt.title('Singular / plural distinction')
plt.xlabel('Trial Number')
plt.suptitle('Proportion of 100 learners who label all 15 items correctly')
Out[16]:
In [17]:
data['Outcomes'] = 'notdual'
data['Outcomes'][2] = 'dual'
data
Out[17]:
In [18]:
P['du / non-du'] = [population_accuracy(M=i) for i in xrange(1,MAX_TRIALS)]
In [19]:
plt.plot(range(1,len(P['du / non-du'])+1), P['du / non-du'], '-', linewidth=2)
plt.title('Dual / non-dual distinction')
plt.xlabel('Trial Number')
plt.suptitle('Proportion of 100 learners who label all 15 items correctly')
Out[19]:
In [20]:
data['Outcomes'] = 'plural'
data['Outcomes'][1] = 'singular'
data['Outcomes'][2] = 'dual'
In [21]:
P['sg / du / pl'] = [population_accuracy(M=i) for i in xrange(1,MAX_TRIALS)]
In [22]:
plt.plot(range(1,len(P['sg / du / pl'])+1), P['sg / du / pl'], '-', linewidth=2)
plt.title('Singular / dual / plural distinction')
plt.xlabel('Trial Number')
plt.suptitle('Proportion of 100 learners who label all 15 items correctly')
Out[22]:
In [23]:
data['Outcomes'] = 'plural'
data['Outcomes'][1] = 'singular'
data['Outcomes'][2] = 'dual'
data['Outcomes'][3] = 'trial'
In [24]:
P['sg / du / tr / pl'] = [population_accuracy(M=i) for i in xrange(1,MAX_TRIALS)]
In [25]:
plt.plot(range(1,len(P['sg / du / tr / pl'])+1), P['sg / du / tr / pl'], '-', linewidth=2)
plt.title('Singular / dual / trial / plural distinction')
plt.xlabel('Trial Number')
plt.suptitle('Proportion of 100 learners who label all 15 items correctly')
Out[25]:
In [26]:
data['Outcomes'] = 'plural'
data['Outcomes'][1] = 'singular'
data['Outcomes'][2] = 'dual'
data['Outcomes'][3] = 'trial'
data['Outcomes'][4] = '4ial'
In [27]:
P['sg / du / tr / qu / pl'] = [population_accuracy(M=i) for i in xrange(1,MAX_TRIALS)]
In [28]:
plt.plot(range(1,len(P['sg / du / tr / qu / pl'])+1), P['sg / du / tr / qu / pl'], '-', linewidth=2)
plt.title('Singular / dual / trial / quadral plural distinction')
plt.xlabel('Trial Number')
plt.suptitle('Proportion of 100 learners who label all 15 items correctly')
Out[28]:
A prediction is that the typology of number systems should roughly correspond to how learnable each type of number system is
In [29]:
for n in ('sg / pl', 'sg / du / pl', 'sg / du / tr / pl', 'du / non-du', 'sg / du / tr / qu / pl'):
plt.plot(range(1,len(P[n])+1), P[n], '-', linewidth=1.5, label=n)
plt.suptitle('Proportion of 100 learners who label all 15 items correctly')
plt.xlabel('Trials')
plt.legend(loc=(-0.55,0.5))
Out[29]: